/*
第一题

因为我在spark-shell中运行时报错显示我 hdfs://localhost:9000/user/hadoop/上没有文件sales_data.csv
所以我把sales_data.csv放到hdfs上 ./bin/hdfs dfs -put sales_data.csv hdfs://localhost:9000/user/hadoop/
./bin/hdfs dfs -ls hdfs://localhost:9000/user/hadoop/
*/

import org.apache.spark.sql.SparkSession
val spark=SparkSession.builder.appName("SalesDataExperiment").getOrCreate()
import spark.implicits._
val salesDF = spark.read.option("header", "true").option("inferSchema", "true").csv("sales_data.csv")
salesDF.printSchema()
salesDF.show(5)